In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter1d
import seaborn as sns
import matplotlib as mpl
from matplotlib.lines import Line2D
import yaml
from scipy import stats

import sys
sys.path.append('../utils/')
from ImagingUtilities import *

import warnings
warnings.filterwarnings('ignore')

from scipy.optimize import curve_fit
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import mean_squared_log_error, r2_score

with open("../data/resources/rcParams.yaml") as f:
    rcParamsDict = yaml.full_load(f)
    for k in rcParamsDict["rcParams"]:
        print("{} {}".format(k,rcParamsDict["rcParams"][k]))
        plt.rcParams[k] = rcParamsDict["rcParams"][k]
    for k1 in set(list(rcParamsDict)).difference(set(["rcParams"])):
        print("{} {}".format(k1,rcParamsDict[k1]))
figure.dpi 80
savefig.dpi 500
figure.figsize [10, 10]
axes.facecolor None
figure.facecolor None
dotSize 20
In [2]:
line_palette = {
 'CTL01A': '#DBB807',
    'CTL08A': '#0FB248',
    'CTL04E': '#FF0054',
    'CTL02A': '#7B00FF',
'H9': '#72190E',
 'H1': '#994F88',
 'CTL05A': '#1965B0',
 'CTL07C': '#437DBF',
 'CTL06F': '#CAE0AB',
 'CTL09A': '#FFFF00',
 'KTD8.2': '#E65518',
 'UCSFi001-A': '#7BAFDE'}

Load data¶

In [3]:
total_df = pd.read_csv('../../iPSC_imaging/quantifications/quantification.csv', index_col=0)
add_tp = pd.read_csv('../../iPSC_imaging/quantifications/quantification_addTP.csv', index_col=0)
total_df[ ~ total_df.duplicated()]
total_df = pd.concat([total_df, add_tp])

donor_map_names = {i:j for i, j in zip(total_df['line'], total_df['line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
total_df['line'] = total_df['line'].map(donor_map_names)
total_df.shape
Out[3]:
(2776, 16)
In [4]:
total_df['pixel_size'] = 1.38
In [5]:
total_df['Area (microm2)'] = total_df.total_area * (1.38**2)
total_df['Area (mm2)'] = total_df['Area (microm2)'] * ((1.38 / 1000)**2)
In [6]:
total_df['line'].unique()
Out[6]:
array(['H1', 'CTL04E', 'CTL02A', 'CTL05A', 'H9', 'KTD8.2', 'CTL09A',
       'CTL06F', 'CTL08A', 'CTL07C', 'UCSFi001-A', 'CTL01A'], dtype=object)
In [7]:
all_tp = total_df.time_point.unique()
all_tp.sort()
all_tp
Out[7]:
array(['01_11_23_t18', '02_11_23_t18', '02_11_23_t9', '03_11_23_t18',
       '03_11_23_t9', '04_11_23_t10', '04_11_23_t18', '05_11_23_t18',
       '05_11_23_t9', '06_11_23_t18', '06_11_23_t9', '07_11_23_t18',
       '07_11_23_t9', '08_11_23_t18', '08_11_23_t9', '09_11_23_t20',
       '09_11_23_t9', '10_11_23_t18', '10_11_23_t9', '11_11_23_t18',
       '11_11_23_t9', '12_11_23_t13', '12_11_23_t18', '13_11_23_t18',
       '13_11_23_t9', '14_11_23_t17', '14_11_23_t9', '15_11_23_t17',
       '15_11_23_t9', '16_11_23_t10', '16_11_23_t18', '17_11_23_t17',
       '17_11_23_t9', '18_11_23_t10', '18_11_23_t18', '19_11_23_t10',
       '19_11_23_t18', '20_11_23_t18', '20_11_23_t9', '21_11_23_t17',
       '21_11_23_t9', '22_11_23_t10', '31_10_23_t18', '31_10_23_t9'],
      dtype=object)

Here I'm adding a few quantifications:

  1. norm_factor: the normalization factor, corresponding to the mean area of the area percentage of all lines for each time point post split (called split_time)
  2. perc_area_norm: the normalized percentage area, corresponding to the percentage area divided by the the normalization factor (1.)
  3. mean_area_tp: the mean area of each line at each time point post split (called split_time)
  4. area_error: the percentage "error" of the total area computed with respect to the mean of that line at that time point
  5. std: the standard deviation of each area with respect to (3.)
  6. cv: the coefficient of variation, corresponding to the ratio between the standard deviation and the mean
In [8]:
total_df['line_split'] = total_df['line'].astype('str') + '_' + total_df['split_time'].astype('str')

mean_df_time_point = total_df.groupby(['split_time']).mean('perc_area')
mean_df_time_point_dict = {i:j for i, j in zip(mean_df_time_point.index, mean_df_time_point.perc_area)}
mean_df_time_point_dict

area_df_time_point = total_df.groupby(['line','split_time']).mean('Area (microm2)').reset_index()
area_df_time_point['line_split'] = area_df_time_point['line'].astype('str') + '_' + area_df_time_point['split_time'].astype('str')
area_df_time_point = {i:j for i, j in zip(area_df_time_point.line_split, area_df_time_point['Area (microm2)'])}
area_df_time_point

total_df['norm_factor'] = total_df.split_time.map(mean_df_time_point_dict)
total_df['perc_area_norm'] = total_df['perc_area'] / total_df['norm_factor']

total_df['mean_area_tp'] = total_df.line_split.map(area_df_time_point)
total_df['area_error'] = (total_df['mean_area_tp'] - total_df['Area (microm2)']) / total_df['Area (microm2)']
total_df['std'] = np.sqrt((total_df['Area (microm2)'] - total_df['mean_area_tp'])**2 / len(total_df))
total_df['cv'] = total_df['std'] / total_df['mean_area_tp']
In [9]:
total_df['line_n_split'] = total_df['line'] + '_' + total_df['n_split'].astype('str')
In [10]:
sns.kdeplot(total_df['cv'])
Out[10]:
<AxesSubplot:xlabel='cv', ylabel='Density'>
In [11]:
ax = sns.kdeplot(total_df['area_error'])
ax.axvline(15)
Out[11]:
<matplotlib.lines.Line2D at 0x7fe4898dcf10>
In [12]:
ax = sns.kdeplot(total_df['area_error'])
ax.set_xlim(-1, 30)
ax.axvline(10)
Out[12]:
<matplotlib.lines.Line2D at 0x7fe4877fc3a0>
In [13]:
fig, ax = plt.subplots(figsize = (20, 10))
sns.scatterplot(data = total_df, y = 'perc_area', x = 'split_time', ax = ax, hue = 'line', palette=line_palette)
Out[13]:
<AxesSubplot:xlabel='split_time', ylabel='perc_area'>
In [14]:
total_df = total_df[~((total_df['split_time'] < 25) & (total_df['perc_area'] > 10))]
In [15]:
total_df = total_df[total_df['area_error'] < 5]
total_df.shape
Out[15]:
(2170, 25)
In [16]:
idx_max = total_df.groupby('line')['n_split'].idxmax()

# Filter the DataFrame using these indices
filtered_df = total_df.drop(idx_max)
filtered_df
Out[16]:
total_area perc_area mean_area_per_colony n_colonies time_point confluency/generation hour month day line ... split_time pixel_size Area (microm2) Area (mm2) line_split mean_area_tp area_error std cv line_n_split
CTL04E_3_14_11_1.czi 2532723.0 26.606326 1.266362e+06 2.0 14_11_23_t9 generation 9 11 14 CTL04E ... 159.0 1.38 4.823318e+06 9.185526 CTL04E_159.0 4.827011e+06 0.000766 70.092418 0.000015 CTL04E_3
CTL02A_3_14_11_3.czi 1051145.0 11.042308 1.313931e+05 8.0 14_11_23_t9 generation 9 11 14 CTL02A ... 44.0 1.38 2.001801e+06 3.812229 CTL02A_44.0 2.137034e+06 0.067556 2566.701082 0.001201 CTL02A_3
CHD8WT_3_14_11_1.czi 1655866.0 17.394919 1.655866e+05 10.0 14_11_23_t9 generation 9 11 14 H9 ... 44.0 1.38 3.153431e+06 6.005394 H9_44.0 1.326935e+06 -0.579209 34666.426244 0.026125 H9_3
KTD8.2_3_14_11_4.czi 862216.0 9.057603 2.874053e+05 3.0 14_11_23_t9 generation 9 11 14 KTD8.2 ... 72.0 1.38 1.642004e+06 3.127033 KTD8.2_72.0 9.805881e+05 -0.402810 12553.504415 0.012802 KTD8.2_3
CTL09A_4_14_11_5.czi 165602.0 1.739654 2.760033e+04 6.0 14_11_23_t9 generation 9 11 14 CTL09A ... 24.0 1.38 3.153724e+05 0.600595 CTL09A_24.0 3.981301e+05 0.262413 1570.719336 0.003945 CTL09A_4
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
CTL08A_5_22_11_2.czi 946317.0 9.941087 6.759407e+04 14.0 22_11_23_t10 generation 10 11 22 CTL08A ... 17.0 1.38 1.802166e+06 3.432045 CTL08A_17.0 2.480685e+06 0.376502 12878.108361 0.005191 CTL08A_5
CTL08A_5_22_11_3.czi 638843.0 6.711064 1.064738e+05 6.0 22_11_23_t10 generation 10 11 22 CTL08A ... 17.0 1.38 1.216613e+06 2.316917 CTL08A_17.0 2.480685e+06 1.039010 23991.761537 0.009671 CTL08A_5
CTL01A_5_22_11_4.czi 834622.0 8.767728 5.961586e+04 14.0 22_11_23_t10 generation 10 11 22 CTL01A ... 17.0 1.38 1.589454e+06 3.026956 CTL01A_17.0 1.876457e+06 0.180567 5447.242123 0.002903 CTL01A_5
CTL08A_5_22_11_4.czi 857634.0 9.009469 1.072042e+05 8.0 22_11_23_t10 generation 10 11 22 CTL08A ... 17.0 1.38 1.633278e+06 3.110415 CTL08A_17.0 2.480685e+06 0.518838 16083.556967 0.006484 CTL08A_5
CHD8WT_5_22_11_1.czi 245495.0 2.578932 1.227475e+05 2.0 22_11_23_t10 generation 10 11 22 H9 ... 17.0 1.38 4.675207e+05 0.890346 H9_17.0 2.381361e+06 4.093595 36324.195795 0.015254 H9_5

2151 rows × 25 columns

In [17]:
total_df['logArea'] = np.log10(total_df['Area (microm2)'] + 0.000001)

Growth curves total area - by line¶

Here we fitted a polynomial regression function of order 3 (exploratory to look at what type of shapes we expect from the curves):

In [18]:
order = total_df.line.unique().tolist()
order.sort()
In [19]:
sns.set_theme(style="ticks")

# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line', palette=line_palette,
                     col_wrap=4, height=5, col_order = order)


# Draw a line plot to show the trajectory of each random walk
grid.map(sns.regplot, "split_time", "Area (mm2)", order = 3)

grid.set_axis_labels("Time point post split", "Area (mm2)")

# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)

Without fitting any regression (line goes through the mean and the highligthed data around is the standard deviation):

In [20]:
sns.set_theme(style="ticks")

# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line', palette=line_palette,
                     col_wrap=4, height=5, col_order = order)

# Draw a line plot to show the trajectory of each random walk
grid.map(sns.lineplot, "split_time", "Area (mm2)", markers = True)

grid.set_axis_labels("Time point post split", "Total Area (pixels)")

# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)

We use the area then average all the FOV for a specific time points in each line. The plot is composed by:

  • a solid blue line, that is the smoothed version of this growth curve (using the function gaussian_filter1d from numpy),
  • the red around that it is the standard deviation
  • the dotted grey line that is the original signal
In [21]:
fig, ax = plt.subplots(4, 3, figsize=(30, 21), gridspec_kw={'hspace': 0.7})
ax = ax.flatten()

for ax_index, line in enumerate(total_df.line.unique()):
    sub = total_df[(total_df.line == line) & (total_df.n_split != 'day')].sort_values(by='datetime')
    mean_st = sub.groupby('split_time')['Area (mm2)'].mean()
    std_st = sub.groupby('split_time')['Area (mm2)'].std()
    y_pos = mean_st.index
    ydata = sub['Area (microm2)'].values
    xdata = sub.split_time.values.astype('int')
    farray = mean_st.values

    # Smoothing
    farray_smooth = gaussian_filter1d(farray, sigma=3)

    # Error formatting
    upper_err = gaussian_filter1d(farray + (std_st / 2).values, sigma=3)
    lower_err = gaussian_filter1d(farray - (std_st / 2).values, sigma=3)

    ax[ax_index].scatter(xdata, ydata)
    ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
    ax[ax_index].plot(y_pos, farray_smooth, color='#2374AB')
    ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)

    # Use meaningful limits for better visualization
    ax[ax_index].set_ylim(0, np.max(farray) * 1.25)

    # Rotate x-axis labels for better readability
    ax[ax_index].tick_params(axis='x', rotation=90)

    # Set axis labels and title
    ax[ax_index].set_title(f'Line {line}')
    ax[ax_index].set_ylabel('Total area')
    ax[ax_index].set_xlabel('Time point after split')

plt.tight_layout()  # Adjust layout to prevent overlapping
plt.show()

Growth curves total area - by line and split¶

Here we fitted a polynomial regression function of order 3:

In [22]:
sns.set_theme(style="ticks")

split_palette = {'1': '#264653', '2': '#2a9d8f', '3': '#8ab17d', '4': '#e9c46a', '5': '#f4a261', '6': '#e76f51'}
line_split_palette = {}

for i in total_df.line_n_split.unique():
    split_n = i.split('_')[-1]
    line_split_palette[i] = split_palette[split_n]

custom_handles = [Line2D([0], [0], color=color, lw=2) for color in split_palette.values()]
grid = sns.FacetGrid(total_df.sort_values(by = 'split_time'), col="line", hue = 'line_n_split', palette=line_split_palette,
                     col_wrap=4, height=5, col_order = order)
grid.map(sns.lineplot, "split_time", "Area (mm2)", markers = True)

grid.add_legend()
if grid._legend:
    grid._legend.remove()  

legend = grid.fig.legend(custom_handles, split_palette.keys(), ncol=2, frameon=False, bbox_to_anchor = (1.2,1), fontsize = 25)

legend.set_title('Passage number', prop={'size': 30})

for ax in grid.axes.flat:
    ax.set_title(ax.get_title(), fontsize=35)

# Increase the size of x and y tick labels
for ax in grid.axes.flat:
    ax.tick_params(axis='x', labelsize=20)
    ax.tick_params(axis='y', labelsize=20)

grid.set_axis_labels("Hours after splitting", "Total area (mm2)", fontsize = 25)
grid.fig.tight_layout(w_pad=1)
grid.fig.savefig('./figures/raw_GC_iPSC_dividedSplit.svg', dpi = 300, bbox_inches = 'tight')

I need to filter out the combination of "line" - "number of split" that do not have enough data point to fit an order 3 polynomial regression:

In [23]:
sns.set_theme(style="ticks")

boolean_sel = pd.Series(total_df.groupby(['line_n_split'])['split_time'].count() > 5)
boolean_sel = boolean_sel[boolean_sel]
filtered_total = total_df[total_df.line_n_split.isin(boolean_sel.index)]

# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(filtered_total.sort_values(by = 'split_time'), col="line_n_split", hue = 'line', palette=line_palette,
                     col_wrap=4, height=5, col_order = order)

grid.map(sns.regplot, "split_time", "Area (microm2)", order = 3)

# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)

We use the area then average all the FOV for a specific time points in each line at each split. The plot is composed by:

  • a solid blue line, that is the smoothed version of this growth curve (using the function gaussian_filter1d from numpy),
  • the red around that it is the standard deviation
  • the dotted grey line that is the original signal
In [24]:
fig, ax = plt.subplots(9,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0
summary_dfs_dict = {}

for l in order:

    sub = total_df[total_df.line == l]
    sub = sub.sort_values(by = 'datetime')
    sub = sub[sub.n_split != 'day']

    splits = sub.n_split.unique().tolist()
    splits.sort()

    for split in splits:

        subsub = sub[sub.n_split == split]

        if len(subsub.split_time.unique()) > 5:
            
            ydata = subsub['Area (microm2)'].values
            xdata = subsub.split_time.values.astype('int')
    
            
            # user defined function,
            # with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
            summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')

            summary_dfs_dict[f'{l}_split_{split}'] = summary_df
            
            farray = np.array(summary_df['mean'])
    
            y_pos = summary_df.split_time.values
    
            # Smoothing
            farray_smooth = gaussian_filter1d(farray, sigma=3)
    
            # Error formatting
            upper_err = gaussian_filter1d(farray + (summary_df['stds'] / 2), sigma=3)
            lower_err = gaussian_filter1d(farray - (summary_df['stds'] / 2), sigma=3)
    
            ax[ax_index].scatter(xdata, ydata)
            ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
            ax[ax_index].plot(y_pos, farray_smooth, color = '#2374AB')
            ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
            #ax[ax_index].errorbar(y_pos, farray, yerr=summary_df['stds'], fmt='none', color='crimson', alpha=0.5)
            #
            ax[ax_index].set_ylim(0, np.max(farray)+(np.max((farray)*25)/100))
            ax[ax_index].xaxis.set_tick_params(rotation=90)
            ax[ax_index].set_title(f'{l}_split_{split}')
            ax[ax_index].set_ylabel('Total area')
            ax[ax_index].set_xlabel('Time point')
            ax_index += 1
            
        else:
            print(f'Skipped split {subsub.n_split.values[0]} of line {subsub.line.values[0]}')

plt.show()
Skipped split 3 of line CTL01A
Skipped split 1 of line CTL04E
Skipped split 1 of line CTL09A
Skipped split 1 of line UCSFi001-A

Growth curves log total area¶

We use the logarithm of the area then sum all the FOV for a specific time points in each line at each split. The plot is composed by:

  • a solid blue line, that is the smoothed version of this growth curve (using the function gaussian_filter1d from numpy),
  • the red around that it is the standard deviation
  • the dotted grey line that is the original signal
In [25]:
fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0

for l in order:

    sub = total_df[total_df.line == l]
    sub = sub.sort_values(by = 'datetime')
    sub = sub[sub.n_split != 'day']

    splits = sub.n_split.unique().tolist()
    splits.sort()

    for split in splits:

        subsub = sub[sub.n_split == split]

        
        # user defined function,
        # with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
        summary_df = preprocess(subsub, original_v='logArea', final_output='area_sum')
        
        farray = np.array(summary_df['area_sum'])

        y_pos = summary_df.datetime.values

        # Smoothing
        farray_smooth = gaussian_filter1d(farray, sigma=1)

        # Error formatting
        upper_err = gaussian_filter1d(farray + (summary_df['stds'] / 2), sigma=1)
        lower_err = gaussian_filter1d(farray - (summary_df['stds'] / 2), sigma=1)

        ax[ax_index].plot(y_pos, farray, '--', linewidth=0.7, color='k', alpha=0.45)
        ax[ax_index].plot(y_pos, farray_smooth, color = '#2374AB')
        ax[ax_index].fill_between(y_pos, upper_err, lower_err, color='crimson', alpha=0.2)
        #
        ax[ax_index].set_ylim(0, np.max(farray)+(np.max((farray)*25)/100))
        ax[ax_index].xaxis.set_tick_params(rotation=90)
        ax[ax_index].set_title(f'{l}_split_{split}')
        ax[ax_index].set_ylabel('Log total area')
        ax[ax_index].set_xlabel('Time point')
        ax_index += 1

plt.show()
In [26]:
lines = total_df.line.unique()

Discrete derivative of the curves - per line per split¶

We use the total area and then sum all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the diff, corresponding to the first discrete derivative.

In [27]:
discrete_deriv_curves = {}

fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0

for l in total_df.line.unique():
    
    #color = color_dict[l]

    sub = total_df[total_df.line == l]
    sub = sub.sort_values(by = 'datetime')
    sub = sub[sub.n_split != 'day']

    for split in sub.n_split.unique():

        subsub = sub[sub.n_split == split]

        # user defined function,
        # with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
        summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
        
        summary_df = summary_df.sort_values('split_time')
        
        summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
        summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()

        discrete_deriv_curves[f'{l}_split_{split}'] = summary_df
        
        farray = np.array(summary_df['derivative'])
        
        y_pos = summary_df.split_time.values
        
        ax[ax_index].errorbar(y_pos, farray, marker = 'o')
        
        ax[ax_index]

        ax[ax_index].xaxis.set_tick_params(rotation=90)
        ax[ax_index].set_title(f'{l}_split_{split}')
        ax[ax_index].set_ylabel('Discrete derivative')
        ax[ax_index].set_xlabel('Time point')
        ax_index += 1
        

plt.show()

Discrete derivative of the curves - per line¶

We collected the results for each line and each split and we can use it to converge to a single result for each line using as replicates the splits.

In [28]:
deriv_df = pd.concat(discrete_deriv_curves.values(), keys = discrete_deriv_curves.keys()).reset_index()
deriv_df['line'] = deriv_df['level_0'].apply(lambda x: x.split('_')[0])
deriv_df['split'] = deriv_df['level_0'].apply(lambda x: x.split('_')[-1])
In [29]:
fig, ax = plt.subplots(4,3, figsize = (5*4, 7*3), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0

for line in deriv_df.line.unique():
    
    #color = color_dict[l]

    sub = deriv_df[deriv_df.line == line]
    
    sub = sub.sort_values('split_time')

    sub['smoothed'] = gaussian_filter1d(sub['derivative'], sigma = 3)

    sns.lineplot(data = sub, y = 'smoothed', x = 'split_time', hue = 'split', ax = ax[ax_index], markers = True, palette=split_palette)
    ax[ax_index].xaxis.set_tick_params(rotation=90)
    ax[ax_index].set_title(f'{line}')
    ax[ax_index].set_ylabel('Discrete derivative')
    ax[ax_index].set_xlabel('Time point')
    ax_index += 1
In [30]:
fig, ax = plt.subplots(4,3, figsize = (5*4, 7*3)) #, gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0

for line in deriv_df.line.unique():
    
    #color = color_dict[l]

    sub = deriv_df[deriv_df.line == line]
    
    sub = sub.sort_values('split_time')

    sub['smoothed'] = gaussian_filter1d(sub['derivative'], sigma = 3)

    sns.lineplot(data = sub, y = 'smoothed', x = 'split_time', ax = ax[ax_index], markers = True, err_style = 'bars')
    ax[ax_index].xaxis.set_tick_params(rotation=90)
    ax[ax_index].set_title(f'{line}')
    ax[ax_index].set_ylabel('Discrete derivative')
    ax[ax_index].set_xlabel('Time point')
    ax_index += 1
    
plt.tight_layout()

Cumulative of the areas - per line per split¶

We use the total area and then average all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the cumulative sum over the discrete differential of the growth. with the cumsum() function.

In [31]:
fig, ax = plt.subplots(9,5, figsize = (5*10, 7*9))
ax = ax.flatten().T
ax_index = 0

order_line_n = total_df.line_n_split.unique().tolist()
order_line_n.sort()

for line_n in order_line_n:
    
    sub = total_df[total_df.line_n_split == line_n]
    #sub = sub[~sub.index.duplicated()]
    sub = sub.sort_values('split_time')

    if len(sub.split_time.unique()) > 5:
        
        #sub['smoothed'] = sub.groupby('line_n_split')['Area (microm2)'].apply(gaussian_filter1d, sigma = 3).loc[line_n]
        y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
        x = sub.split_time.unique()
        
        sns.lineplot(y = y, x = x, ax = ax[ax_index], markers = True, err_style='bars')
        ax[ax_index].xaxis.set_tick_params(rotation=90)
        ax[ax_index].set_title(f'{line_n}')
        ax[ax_index].set_ylabel('Cumulative of mean total area')
        ax[ax_index].set_xlabel('Time point')
        ax_index += 1

plt.tight_layout()
In [32]:
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}

total_df_no_first = total_df[total_df.n_split != '1'].copy()

for l in total_df.line.unique():
    
    color = line_palette[l]

    sub = total_df[total_df.line == l]
    sub = sub.sort_values(by = 'datetime')
    sub = sub[sub.n_split != 'day']

    for split in sub.n_split.unique():

        subsub = sub[sub.n_split == split]

        # user defined function,
        # with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
        summary_df = preprocess(subsub, original_v='Area (mm2)', final_output='mean')
        
        summary_df = summary_df.sort_values('split_time')
        
        summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 1)
        summary_df['cumulative'] = summary_df['smoothed'].cumsum()
        
        farray = np.array(summary_df['cumulative'])
        
        y_pos = summary_df.split_time.values

        line, = ax.plot(y_pos, farray, color = color, marker = '.')
        ax.xaxis.set_tick_params(rotation=90)
        all_lines[f'{l}_{split}'] = line

    labels.append(l)
    handles.append(line)
    lc.append(color)
    
plt.legend(handles, labels, bbox_to_anchor = (1,1))
Out[32]:
<matplotlib.legend.Legend at 0x7fe4843eddc0>
In [33]:
lines = total_df.line.unique()
In [34]:
highlight_growth_curves(all_lines, xlabel = 'Hours from split', ylabel = 'Cumulative growth', lines = lines, fontsize = 20)
#plt.savefig('growth_curve_per_line.pdf', dpi = 300)
In [35]:
len(np.float64(list(range(0,200))))
Out[35]:
200
In [36]:
fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0

fitted_param = {}

def exp_model(t, a, b, c):
    return a * np.exp(b * t) + c - a

for line_n in order_line_n:

    fitted_param[line_n] = {}
    sub = total_df[total_df.line_n_split == line_n]
    sub = sub.sort_values('split_time')
    
    if len(sub.split_time.unique()) > 5:
        # Calculate the cumulative sum of the mean total area for each split_time
        y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
        y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()

        #print(y_min_0)
            
        
        x = np.array(sub.split_time.unique())
        hue = [line_n.split('_')[0]]*len(y)

        try:
            #print(min(y))
            popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0), bounds=((-np.inf, 0, y_min_0), (np.inf,np.inf, np.inf)))

            fitted_param[line_n]['a'] = popt[0]
            fitted_param[line_n]['rate'] = popt[1]
            fitted_param[line_n]['min(y)'] = min(y)
            fitted_param[line_n]['c'] = popt[2]
            fitted_param[line_n]['mean_cum'] = y
            fitted_param[line_n]['split_time'] = x
            
            a, b, c = popt
            
            # Generate fitted y values
            y_fitted = exp_model(x, a, b, c)
            y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c)

            fitted_param[line_n]['y_fitted'] = y_fitted
            fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
            fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
            
            sns.lineplot(y = y_fitted_viz, x = np.float64(list(range(0, 200))), ax = ax[ax_index]) #, markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
            sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
            #ax[ax_index].xaxis.set_tick_params(rotation=90)
            ax[ax_index].tick_params(axis='both', which='major', labelsize=30)  # Adjust major ticks
            ax[ax_index].tick_params(axis='both', which='minor', labelsize=20)  # Adjust minor ticks
            ax[ax_index].set_title(f'{line_n}', fontsize = 40)
            ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
            ax[ax_index].set_xlabel('Time point', fontsize = 35)
            ax[ax_index].set_xlim(-10, 220)
            ax[ax_index].set_ylim(-10, 250)
            #ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
            #ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
            ax_index += 1
        
        except RuntimeError as e:
            print(f"Fitting failed for line_n {line_n}: {e}")
            
plt.tight_layout()
plt.savefig('./figures/Fitted_cum_area_per_split_new_viz.svg', dpi = 300, bbox_inches = 'tight')
Fitting failed for line_n CTL01A_5: Optimal parameters not found: The maximum number of function evaluations is exceeded.
In [37]:
fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0

fitted_param = {}

def exp_model(t, a, b, c):
    return a * np.exp(b * t) + c - a

for line_n in order_line_n:

    fitted_param[line_n] = {}
    sub = total_df[total_df.line_n_split == line_n]
    sub = sub.sort_values('split_time')
    
    if len(sub.split_time.unique()) > 5:
        # Calculate the cumulative sum of the mean total area for each split_time
        y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
        y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()

        #print(y_min_0)
            
        
        x = np.array(sub.split_time.unique())
        hue = [line_n.split('_')[0]]*len(y)

        try:
            #print(min(y))
            popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0), bounds=((-np.inf, 0, y_min_0), (np.inf,np.inf, np.inf)))

            fitted_param[line_n]['a'] = popt[0]
            fitted_param[line_n]['rate'] = popt[1]
            fitted_param[line_n]['min(y)'] = min(y)
            fitted_param[line_n]['c'] = popt[2]
            fitted_param[line_n]['mean_cum'] = y
            fitted_param[line_n]['split_time'] = x
            
            a, b, c = popt
            
            # Generate fitted y values
            y_fitted = exp_model(x, a, b, c)
            y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c)

            fitted_param[line_n]['y_fitted'] = y_fitted
            #fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
            fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
            
            sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
            sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
            ax[ax_index].xaxis.set_tick_params(rotation=90)
            ax[ax_index].set_title(f'{line_n}', fontsize = 40)
            ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
            ax[ax_index].set_xlabel('Time point', fontsize = 35)
            #ax[ax_index].set_xlim(-10, 220)
            #ax[ax_index].set_ylim(-10, 85000)
            _ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
            _ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
            ax_index += 1
        
        except RuntimeError as e:
            print(f"Fitting failed for line_n {line_n}: {e}")
            
plt.tight_layout()
#plt.savefig('./figures/Fitted_cum_area_per_split_orig_viz.svg', dpi = 300, bbox_inches = 'tight')
Fitting failed for line_n CTL01A_5: Optimal parameters not found: The maximum number of function evaluations is exceeded.
In [38]:
donor_map_names = {i:j for i, j in zip(total_df['line'], total_df['line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
donor_map_names['CTL01A'] = 'CTL01'
total_df['line'] = total_df['line'].map(donor_map_names)
total_df['line_n_split'] = total_df['line'] + '_' + total_df['n_split'].astype('str')
order_line_n = total_df.line_n_split.unique().tolist()
order_line_n.sort()

line_palette = {'CTL01': '#DBB807',
 'CTL08A': '#0FB248',
 'CTL04E': '#FF0054',
 'CTL02A': '#7B00FF',
 'H9': '#72190E',
 'H1': '#994F88',
 'CTL05A': '#1965B0',
 'CTL07C': '#437DBF',
 'CTL06F': '#CAE0AB',
 'CTL09A': '#FFFF00',
 'KTD8.2': '#E65518',
 'UCSFi001-A': '#7BAFDE'}

fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0

fitted_param = {}

def exp_model(t, a, b, c, d):
    return a * np.exp(b * (t + d)) + c - a

for line_n in order_line_n:

    fitted_param[line_n] = {}
    sub = total_df[total_df.line_n_split == line_n]
    sub = sub.sort_values('split_time')
    
    if len(sub.split_time.unique()) > 5:
        # Calculate the cumulative sum of the mean total area for each split_time
        y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
        y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()

        #print(y_min_0)
            
        
        x = np.array(sub.split_time.unique())
        hue = [line_n.split('_')[0]]*len(y)

        try:
            #print(min(y))
            popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0, 0), bounds=((-np.inf, 0, y_min_0, 0), (np.inf,np.inf, np.inf, np.inf)))

            fitted_param[line_n]['a'] = popt[0]
            fitted_param[line_n]['rate'] = popt[1]
            fitted_param[line_n]['min(y)'] = min(y)
            fitted_param[line_n]['c'] = popt[2]
            fitted_param[line_n]['d'] = popt[3]
            fitted_param[line_n]['mean_cum'] = y
            fitted_param[line_n]['split_time'] = x
            
            a, b, c, d = popt
            
            # Generate fitted y values
            y_fitted = exp_model(x, a, b, c, d)
            y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c, d)

            fitted_param[line_n]['y_fitted'] = y_fitted
            #fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
            fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
            
            sns.lineplot(y = y_fitted_viz, x = np.float64(list(range(0, 200))), ax = ax[ax_index]) #, markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
            sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
            ax[ax_index].xaxis.set_tick_params(rotation=90)
            ax[ax_index].set_title(f'{line_n}', fontsize = 40)
            ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
            ax[ax_index].set_xlabel('Time point', fontsize = 35)
            ax[ax_index].set_xlim(-10, 220)
            ax[ax_index].set_ylim(-10, 250)
            _ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
            _ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
            ax_index += 1
        
        except RuntimeError as e:
            print(f"Fitting failed for line_n {line_n}: {e}")
            
plt.tight_layout()
#plt.savefig('./figures/Fitted_cum_area_per_split_new_viz_new_name.svg', dpi = 300, bbox_inches = 'tight')
In [39]:
fitted_df = pd.DataFrame.from_dict(fitted_param).T
fitted_df['Line'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[0]).values
fitted_df['split'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[1]).values
fitted_df = fitted_df[~fitted_df.a.isna()]
fitted_df['a+c'] = fitted_df['a'] + fitted_df['c']
fitted_df
Out[39]:
a rate min(y) c d mean_cum split_time y_fitted r2 Line split a+c
CTL01_2 0.140842 0.031439 0.128738 0.051989 0.0 [0.12873836706191996, 0.5126940096263999, 0.75... [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] [0.5684554397560638, 0.7564249822784911, 1.265... 0.955038 CTL01 2 0.192832
CTL01_4 6.709435 0.020925 1.071547 0.93823 0.0 [1.0715473444571997, 1.7103960320254554, 3.865... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] [0.938230218953283, 2.160852939749881, 5.54949... 0.958803 CTL01 4 7.647666
CTL01_5 186.079023 0.000661 1.015545 1.016249 1.889604 [1.0155450486297597, 1.5641046091350714, 4.591... [0.0, 16.0, 17.0, 24.0, 39.0, 48.0, 63.0] [1.2487270333315053, 3.228882692965442, 3.3533... 0.886366 CTL01 5 187.095273
CTL02A_1 1.35829 0.02225 1.065226 0.550477 0.0 [1.0652259377527198, 1.4048785412085596, 2.313... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.4269678603212426, 3.1441310003253333, 4.709... 0.974893 CTL02A 1 1.908767
CTL02A_2 0.954612 0.043378 1.26411 0.76608 0.0 [1.2641101587982075, 1.7939214715189675, 4.596... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] [1.6412790235890737, 2.5151392616598383, 4.993... 0.992677 CTL02A 2 1.720692
CTL02A_3 16.613952 0.016347 1.649741 0.656578 0.0 [1.6497408713817594, 2.607119548807967, 3.9639... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.6565776408130546, 2.0715333228559345, 7.081... 0.975446 CTL02A 3 17.27053
CTL02A_5 0.682361 0.037966 0.654197 0.588409 0.0 [0.6541966858658399, 0.9161832703838398, 1.754... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.58840944724512, 1.158686195549207, 1.603241... 0.983085 CTL02A 5 1.27077
CTL04E_2 0.821181 0.024744 0.299609 0.299609 0.0 [0.2996085652689599, 1.1036059070299196, 1.703... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9655543682098432, 1.3365054988339438, 2.239... 0.985605 CTL04E 2 1.120789
CTL04E_3 1.306327 0.023063 2.123347 2.123347 0.0 [2.1233470930991993, 2.4315673509784794, 2.677... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [2.66327926587685, 3.089170751678419, 4.028297... 0.976291 CTL04E 3 3.429674
CTL04E_4 9.612778 0.017852 1.218989 0.859733 0.0 [1.2189894108551995, 2.1238454675329193, 2.880... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.8597330722492273, 2.3354356093880746, 6.266... 0.990038 CTL04E 4 10.472511
CTL04E_5 2.962125 0.037622 1.656866 0.84359 0.0 [1.6568662053110392, 2.247560498352959, 2.5440... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.8435904553541462, 3.2893968997575933, 5.188... 0.835627 CTL04E 5 3.805716
CTL05A_1 0.831961 0.030813 0.779193 0.497694 0.0 [0.7791926205821758, 2.426386553802719, 4.5256... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.432703584024785, 3.3169982345469524, 5.4623... 0.99813 CTL05A 1 1.329655
CTL05A_2 0.055171 0.043012 0.205236 0.490657 0.192915 [0.20523627369755992, 0.5079725389905838, 0.63... [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... [0.5174144971958101, 0.5916686668561622, 0.665... 0.999672 CTL05A 2 0.545828
CTL05A_3 2.543078 0.01896 1.430206 0.870664 0.0 [1.4302055733008394, 1.6829849546056073, 1.766... [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... [0.8706640646764798, 1.7072361489421515, 2.260... 0.984739 CTL05A 3 3.413742
CTL06F_1 0.192261 0.038794 0.186632 0.149139 0.0 [0.18663200746559994, 0.5037948979217999, 0.99... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... [0.14913877596192965, 0.8297827985903414, 1.19... 0.998507 CTL06F 1 0.341399
CTL06F_2 0.178737 0.048632 0.221248 0.603456 0.025076 [0.22124802574367994, 0.5315105005565759, 0.98... [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... [0.6036744948519198, 0.7019439338747042, 0.999... 0.998732 CTL06F 2 0.782194
CTL06F_3 9.10152 0.017721 0.835248 0.584532 0.0 [0.8352482294782076, 1.4114732831694714, 2.609... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.5845324628693156, 1.4277887490358108, 4.455... 0.9908 CTL06F 3 9.686052
CTL06F_5 1.440098 0.028446 0.995236 1.003616 1.829151 [0.9952359126703195, 2.065253135791535, 2.6851... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.0805315559824449, 1.9549254566209664, 2.566... 0.996178 CTL06F 5 2.443714
CTL07C_2 0.247166 0.031126 0.098445 0.039209 0.0 [0.09844493853571196, 0.27541168560691187, 0.4... [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... [0.11911823212082312, 0.31373406544445276, 0.4... 0.990575 CTL07C 2 0.286375
CTL07C_3 1.574502 0.035564 2.400426 2.022206 0.689164 [2.400426353463839, 3.2856292908866864, 4.3438... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [2.061272905633547, 3.198562502772172, 4.23628... 0.998319 CTL07C 3 3.596708
CTL07C_4 6.062433 0.019101 0.478994 0.409045 0.0 [0.47899434749327985, 0.8008914021730558, 1.50... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.409045425456962, 1.016575988537614, 3.22947... 0.978949 CTL07C 4 6.471478
CTL07C_6 2.86121 0.030355 0.717828 0.424542 0.0 [0.7178284323935998, 1.3666883712911995, 2.714... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.4245424828464115, 2.2135806392001545, 3.491... 0.982106 CTL07C 6 3.285752
CTL08A_1 1.187041 0.033521 0.33419 0.103732 0.0 [0.33418952506655986, 1.1351766731587196, 3.25... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.10373199917488773, 3.3042502170853707, 4.84... 0.980946 CTL08A 1 1.290773
CTL08A_2 0.706487 0.033213 0.165209 0.120179 0.0 [0.16520885806607996, 0.42682587784329584, 0.6... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [0.12017926217232122, 0.5763852017201965, 0.98... 0.996042 CTL08A 2 0.826666
CTL08A_3 4.495583 0.021725 0.369839 0.164502 0.0 [0.3698385596056799, 0.6810055432149598, 1.573... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.1645016438908815, 0.6803478200863058, 2.610... 0.973682 CTL08A 3 4.660084
CTL08A_5 29.785332 0.003436 0.718488 0.314723 3.941521 [0.7184884989571197, 0.8260278491119677, 3.779... [0.0, 16.0, 17.0, 24.0, 39.0, 48.0, 63.0] [0.7208531458460676, 2.4271380041338624, 2.536... 0.886176 CTL08A 5 30.100055
CTL09A_2 0.263506 0.036516 0.416052 0.326098 0.0 [0.41605228590047993, 0.8020691253345598, 1.64... [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... [0.9418803460915361, 1.6397387847215783, 2.174... 0.9842 CTL09A 2 0.589604
CTL09A_3 2.677791 0.028298 1.09468 0.4693 0.0 [1.0946797761170877, 2.2220952270454073, 3.557... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [1.8852783759737175, 3.0726940265447356, 5.865... 0.98176 CTL09A 3 3.147091
CTL09A_4 3.743789 0.024477 0.856375 0.856375 0.0 [0.8563747115980797, 1.4846659608893753, 2.637... [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... [1.7789881436924095, 3.8490741088358766, 5.306... 0.990837 CTL09A 4 4.600164
CTL09A_5 0.63178 0.044788 0.999207 1.358836 0.463645 [0.9992065878129598, 1.4917047366392635, 2.493... [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] [1.3720928133032406, 1.650041365050099, 2.6168... 0.98726 CTL09A 5 1.990617
H1_1 0.881927 0.022767 0.127647 0.032898 0.0 [0.12764744386243193, 0.2877317192128319, 0.33... [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... [0.23345521211899467, 0.6740987835864768, 1.02... 0.98154 H1 1 0.914825
H1_2 4.602872 0.013332 2.472889 2.472889 0.0 [2.4728886058766393, 2.915316716446655, 3.1268... [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... [2.4728886059932584, 3.4919010775579657, 5.611... 0.957078 H1 2 7.07576
H9_1 4.27921 0.024254 0.377707 0.350742 0.0 [0.3777067706471999, 0.9652240400097598, 2.490... [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... [0.35074196362019805, 2.228442590313313, 3.730... 0.986148 H9 1 4.629952
H9_2 0.659132 0.045415 0.301872 0.153647 0.0 [0.30187165062959986, 1.850863636851551, 3.155... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [0.7971585634024018, 1.4548830572822027, 3.368... 0.999084 H9 2 0.812778
H9_3 11.484073 0.015581 0.90592 0.3021 0.0 [0.9059195979950396, 1.1298801829973755, 1.425... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.30210013531553237, 1.232548066656026, 4.501... 0.975495 H9 3 11.786173
H9_5 1.240493 0.023808 0.809685 0.730269 0.791928 [0.8096849757622797, 0.9034534152302397, 1.793... [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] [0.7538788914698014, 1.3399635667979695, 1.384... 0.965619 H9 5 1.970761
KTD8.2_1 0.238791 0.041092 0.550485 0.550485 0.0 [0.5504846337575998, 0.9347023082408397, 1.524... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.5504846337576003, 1.4975103321043415, 2.028... 0.986486 KTD8.2 1 0.789275
KTD8.2_2 0.367678 0.039801 0.713858 1.743854 0.6572 [0.7138583617075197, 1.1289241745020795, 3.124... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [1.753597797333993, 2.061830036541058, 2.35718... 0.987279 KTD8.2 2 2.111532
KTD8.2_3 2.067122 0.018607 0.107452 0.056472 0.0 [0.10745158306233597, 0.3914869295194559, 0.60... [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... [0.43331855893314675, 1.4698024054174912, 1.80... 0.988663 KTD8.2 3 2.123594
KTD8.2_4 1.686085 0.026211 1.417346 1.344326 0.062617 [1.4173458607295994, 2.1666846135959994, 2.887... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3470955115139647, 2.2270197490582477, 2.826... 0.987125 KTD8.2 4 3.030411
UCSFi001-A_2 0.588666 0.026581 0.470745 0.461125 0.0 [0.4707453288189598, 1.1102174528831996, 1.745... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9865637008725591, 1.2876735624447133, 2.037... 0.970674 UCSFi001-A 2 1.049792
UCSFi001-A_3 1.067307 0.025661 0.823938 0.73609 0.0 [0.8239383636753597, 1.0375904860577996, 1.344... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... [0.7360902739843218, 1.2371895963042165, 1.644... 0.995604 UCSFi001-A 3 1.803397
UCSFi001-A_4 11.891798 0.016863 0.65714 0.65714 0.0 [0.6571397848564797, 1.3643807979277436, 3.360... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.6571397848586606, 2.3746568201900295, 6.892... 0.99402 UCSFi001-A 4 12.548938
UCSFi001-A_5 0.695858 0.055867 1.776417 1.328369 0.0 [1.7764168326609595, 2.3372079343318073, 3.079... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3283694521265619, 2.3335912072530425, 3.292... 0.981117 UCSFi001-A 5 2.024227
In [40]:
donor_map_names = {i:j for i, j in zip(total_df['line'], total_df['line'])}
donor_map_names['CHD2WT'] = 'UCSFi001-A'
donor_map_names['CHD8WT'] = 'H9'
donor_map_names['CTL01'] = 'CTL01A'
total_df['line'] = total_df['line'].map(donor_map_names)
total_df['line_n_split'] = total_df['line'] + '_' + total_df['n_split'].astype('str')
order_line_n = total_df.line_n_split.unique().tolist()
order_line_n.sort()

line_palette = {'CTL01A': '#DBB807',
 'CTL08A': '#0FB248',
 'CTL04E': '#FF0054',
 'CTL02A': '#7B00FF',
 'H9': '#72190E',
 'H1': '#994F88',
 'CTL05A': '#1965B0',
 'CTL07C': '#437DBF',
 'CTL06F': '#CAE0AB',
 'CTL09A': '#FFFF00',
 'KTD8.2': '#E65518',
 'UCSFi001-A': '#7BAFDE'}

fig, ax = plt.subplots(5,9, figsize = (5*12, 8*5))
ax = ax.flatten().T
ax_index = 0

fitted_param = {}

def exp_model(t, a, b, c):
    return a * np.exp(b * t) + c - a

for line_n in order_line_n:

    fitted_param[line_n] = {}
    sub = total_df[total_df.line_n_split == line_n]
    sub = sub.sort_values('split_time')
    
    if len(sub.split_time.unique()) > 5:
        # Calculate the cumulative sum of the mean total area for each split_time
        y = sub.groupby('split_time')['Area (mm2)'].mean().cumsum().values
        y_min_0 = sub[(sub.split_time == sub.split_time.min()) & (sub['Area (mm2)'] > 0)]['Area (mm2)'].min()

        #print(y_min_0)
            
        
        x = np.array(sub.split_time.unique())
        hue = [line_n.split('_')[0]]*len(y)

        try:
            #print(min(y))
            popt, pcov = curve_fit(exp_model, x, y, p0=(min(y), 0.1, y_min_0), bounds=((-np.inf, 0, y_min_0), (np.inf,np.inf, np.inf)))

            fitted_param[line_n]['a'] = popt[0]
            fitted_param[line_n]['rate'] = popt[1]
            fitted_param[line_n]['min(y)'] = min(y)
            fitted_param[line_n]['c'] = popt[2]
            fitted_param[line_n]['mean_cum'] = y
            fitted_param[line_n]['split_time'] = x
            
            a, b, c = popt
            
            # Generate fitted y values
            y_fitted = exp_model(x, a, b, c)
            y_fitted_viz = exp_model(np.float64(list(range(0, 200))), a, b, c)

            fitted_param[line_n]['y_fitted'] = y_fitted
            fitted_param[line_n]['MSLE'] = mean_squared_log_error(y, y_fitted)
            fitted_param[line_n]['r2'] = r2_score(y, y_fitted)
            
            sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, linewidth = 5, legend = None)
            sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True, hue = hue, palette=line_palette, s = 150, legend=None)
            ax[ax_index].xaxis.set_tick_params(rotation=90)
            ax[ax_index].set_title(f'{line_n}', fontsize = 40)
            ax[ax_index].set_ylabel('Cumulative area (mm2)', fontsize = 35)
            ax[ax_index].set_xlabel('Time point', fontsize = 35)
            #ax[ax_index].set_xlim(-10, 220)
            #ax[ax_index].set_ylim(-10, 85000)
            _ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 30)
            _ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 30)
            ax_index += 1
        
        except RuntimeError as e:
            print(f"Fitting failed for line_n {line_n}: {e}")
            
plt.tight_layout()
#plt.savefig('./figures/Fitted_cum_area_per_split_orig_viz_new_name.svg', dpi = 300, bbox_inches = 'tight')
Fitting failed for line_n CTL01A_5: Optimal parameters not found: The maximum number of function evaluations is exceeded.
In [41]:
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}

total_df_no_first = total_df[total_df.n_split != '1'].copy()

for l in total_df.line.unique():
    
    color = line_palette[l]

    sub = total_df[total_df.line == l]
    sub = sub.sort_values(by = 'datetime')
    sub = sub[sub.n_split != 'day']

    for split in sub.n_split.unique():


        subsub = sub[sub.n_split == split]

        if len(subsub.split_time.unique()) > 5:

            # user defined function,
            # with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
            summary_df = preprocess(subsub, original_v='Area (mm2)', final_output='mean')
            
            summary_df = summary_df.sort_values('split_time')
            summary_df['cumulative'] = summary_df['mean'].cumsum()
            
            farray = np.array(summary_df['cumulative'].values)
            y_pos = summary_df.split_time.values
    
            popt, pcov = curve_fit(exp_model, y_pos, farray, p0=(max(farray), 0.1, 0))
            
            a, b, c = popt
    
            y_fitted = exp_model(y_pos, a, b, c)
    
            line, = ax.plot(y_pos, y_fitted, color = color, marker = '.')
            ax.xaxis.set_tick_params(rotation=90)
            all_lines[f'{l}_{split}'] = line
    
        labels.append(l)
        handles.append(line)
        lc.append(color)
    
plt.legend(handles, labels, bbox_to_anchor = (1,1))
Out[41]:
<matplotlib.legend.Legend at 0x7fe484fef520>
In [42]:
lines = total_df.line.unique()
In [43]:
highlight_growth_curves(all_lines, xlabel = 'Hours from split', ylabel = 'Cumulative area (mm2)', lines = lines, fontsize = 20)
#plt.savefig('./figures/cumulative_growth_curve_per_line.svg', dpi = 300)
In [44]:
fitted_df = pd.DataFrame.from_dict(fitted_param).T
fitted_df['Line'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[0]).values
fitted_df['split'] = fitted_df.reset_index()['index'].apply(lambda x: x.split('_')[1]).values
fitted_df = fitted_df[~fitted_df.a.isna()]
fitted_df['a+c'] = fitted_df['a'] + fitted_df['c']
In [45]:
fitted_df[fitted_df['a+c'] > 0].shape
Out[45]:
(43, 12)
In [46]:
fitted_df
Out[46]:
a rate min(y) c mean_cum split_time y_fitted MSLE r2 Line split a+c
CTL01A_2 0.140845 0.031439 0.128738 0.051989 [0.12873836706191996, 0.5126940096263999, 0.75... [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] [0.5684601783697514, 0.7564306877808943, 1.265... 0.033141 0.955038 CTL01A 2 0.192835
CTL01A_4 6.709435 0.020925 1.071547 0.93823 [1.0715473444571997, 1.7103960320254554, 3.865... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] [0.9382302189532821, 2.1608528979695825, 5.549... 0.024887 0.958803 CTL01A 4 7.647665
CTL02A_1 1.358287 0.02225 1.065226 0.550477 [1.0652259377527198, 1.4048785412085596, 2.313... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.4269662693898706, 3.144128958826294, 4.7098... 0.10339 0.974893 CTL02A 1 1.908765
CTL02A_2 0.954612 0.043378 1.26411 0.76608 [1.2641101587982075, 1.7939214715189675, 4.596... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] [1.6412790277239497, 2.5151392692655543, 4.993... 0.016467 0.992677 CTL02A 2 1.720692
CTL02A_3 16.613953 0.016347 1.649741 0.656578 [1.6497408713817594, 2.607119548807967, 3.9639... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.6565776402558363, 2.0715333393022206, 7.081... 0.11749 0.975446 CTL02A 3 17.27053
CTL02A_5 0.682361 0.037966 0.654197 0.588409 [0.6541966858658399, 0.9161832703838398, 1.754... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.5884094472451199, 1.1586862058833431, 1.603... 0.007286 0.983085 CTL02A 5 1.27077
CTL04E_2 0.821185 0.024744 0.299609 0.299609 [0.2996085652689599, 1.1036059070299196, 1.703... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9655562584883766, 1.3365081793448954, 2.239... 0.028769 0.985605 CTL04E 2 1.120794
CTL04E_3 1.306325 0.023063 2.123347 2.123347 [2.1233470930991993, 2.4315673509784794, 2.677... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [2.6632786987130626, 3.089169790618284, 4.0282... 0.068529 0.976291 CTL04E 3 3.429672
CTL04E_4 9.612778 0.017852 1.218989 0.859733 [1.2189894108551995, 2.1238454675329193, 2.880... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.8597330722454402, 2.3354356000019507, 6.266... 0.044278 0.990038 CTL04E 4 10.472511
CTL04E_5 2.962251 0.037622 1.656866 0.84359 [1.6568662053110392, 2.247560498352959, 2.5440... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.8435904553543887, 3.289444280042749, 5.1886... 0.200981 0.835627 CTL04E 5 3.805842
CTL05A_1 0.831961 0.030813 0.779193 0.497694 [0.7791926205821758, 2.426386553802719, 4.5256... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.432703578982128, 3.3169982277731034, 5.4623... 0.052295 0.99813 CTL05A 1 1.329655
CTL05A_2 0.055631 0.043012 0.205236 0.491117 [0.20523627369755992, 0.5079725389905838, 0.63... [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... [0.5174145386527128, 0.5916687051661024, 0.665... 0.00577 0.999672 CTL05A 2 0.546748
CTL05A_3 2.543079 0.01896 1.430206 0.870664 [1.4302055733008394, 1.6829849546056073, 1.766... [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... [0.8706640646764798, 1.7072362370661094, 2.260... 0.079341 0.984739 CTL05A 3 3.413743
CTL06F_1 0.192261 0.038794 0.186632 0.149139 [0.18663200746559994, 0.5037948979217999, 0.99... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... [0.14913877596194025, 0.8297829559442185, 1.19... 0.008407 0.998507 CTL06F 1 0.3414
CTL06F_2 0.178955 0.048632 0.221248 0.603675 [0.22124802574367994, 0.5315105005565759, 0.98... [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... [0.6036745082056189, 0.701943955589503, 0.9996... 0.014467 0.998732 CTL06F 2 0.78263
CTL06F_3 9.101519 0.017721 0.835248 0.584532 [0.8352482294782076, 1.4114732831694714, 2.609... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.5845324628693316, 1.4277887166726817, 4.455... 0.035992 0.9908 CTL06F 3 9.686051
CTL06F_5 1.517014 0.028446 0.995236 1.080533 [0.9952359126703195, 2.065253135791535, 2.6851... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.0805325124857454, 1.9549269947108237, 2.566... 0.001386 0.996178 CTL06F 5 2.597547
CTL07C_2 0.247166 0.031126 0.098445 0.039209 [0.09844493853571196, 0.27541168560691187, 0.4... [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... [0.11911823217794204, 0.31373406561857875, 0.4... 0.004426 0.990575 CTL07C 2 0.286375
CTL07C_3 1.613569 0.035564 2.400426 2.061275 [2.400426353463839, 3.2856292908866864, 4.3438... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [2.0612745335144926, 3.198564283424051, 4.2362... 0.003228 0.998319 CTL07C 3 3.674844
CTL07C_4 6.062421 0.019101 0.478994 0.409045 [0.47899434749327985, 0.8008914021730558, 1.50... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.4090454254569629, 1.016575178648564, 3.2294... 0.13463 0.978949 CTL07C 4 6.471466
CTL07C_6 2.861219 0.030355 0.717828 0.424542 [0.7178284323935998, 1.3666883712911995, 2.714... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.42454248274224105, 2.2135828308679812, 3.49... 0.030066 0.982106 CTL07C 6 3.285761
CTL08A_1 1.187035 0.033521 0.33419 0.103732 [0.33418952506655986, 1.1351766731587196, 3.25... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.10373199917555276, 3.304242116545631, 4.849... 0.085182 0.980946 CTL08A 1 1.290767
CTL08A_2 0.706487 0.033213 0.165209 0.120179 [0.16520885806607996, 0.42682587784329584, 0.6... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [0.12017926217257657, 0.5763851701267808, 0.98... 0.02542 0.996042 CTL08A 2 0.826666
CTL08A_3 4.495572 0.021725 0.369839 0.164502 [0.3698385596056799, 0.6810055432149598, 1.573... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.16450164389087973, 0.680346996777927, 2.610... 0.151265 0.973682 CTL08A 3 4.660074
CTL08A_5 30.191408 0.003436 0.718488 0.720853 [0.7184884989571197, 0.8260278491119677, 3.779... [0.0, 16.0, 17.0, 24.0, 39.0, 48.0, 63.0] [0.7208533279078502, 2.4271379428335926, 2.536... 0.073788 0.886176 CTL08A 5 30.912261
CTL09A_2 0.263507 0.036516 0.416052 0.326098 [0.41605228590047993, 0.8020691253345598, 1.64... [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... [0.9418809126629182, 1.6397397810736654, 2.174... 0.041404 0.9842 CTL09A 2 0.589605
CTL09A_3 2.67779 0.028298 1.09468 0.4693 [1.0946797761170877, 2.2220952270454073, 3.557... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [1.8852781353057484, 3.07269361924069, 5.86530... 0.0525 0.98176 CTL09A 3 3.14709
CTL09A_4 3.743787 0.024477 0.856375 0.856375 [0.8563747115980797, 1.4846659608893753, 2.637... [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... [1.778987723662873, 3.849072862477047, 5.30617... 0.108845 0.990837 CTL09A 4 4.600161
CTL09A_5 0.645037 0.044788 0.999207 1.372094 [0.9992065878129598, 1.4917047366392635, 2.493... [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] [1.3720943628331366, 1.6500428832472247, 2.616... 0.013601 0.98726 CTL09A 5 2.017131
H1_1 0.881922 0.022767 0.127647 0.032898 [0.12764744386243193, 0.2877317192128319, 0.33... [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... [0.2334544692034437, 0.6740965916339468, 1.020... 0.103301 0.98154 H1 1 0.91482
H1_2 4.602859 0.013332 2.472889 2.472889 [2.4728886058766393, 2.915316716446655, 3.1268... [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... [2.472888605876677, 3.4918995137855164, 5.6118... 0.116448 0.957078 H1 2 7.075747
H9_1 4.27921 0.024254 0.377707 0.350742 [0.3777067706471999, 0.9652240400097598, 2.490... [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... [0.35074196350559994, 2.2284424262323013, 3.73... 0.046817 0.986148 H9 1 4.629952
H9_2 0.659132 0.045415 0.301872 0.153647 [0.30187165062959986, 1.850863636851551, 3.155... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [0.7971585632554442, 1.4548830570166933, 3.368... 0.01872 0.999084 H9 2 0.812778
H9_3 11.484083 0.015581 0.90592 0.3021 [0.9059195979950396, 1.1298801829973755, 1.425... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.30210013520928314, 1.2325485628117843, 4.50... 0.189097 0.975495 H9 3 11.786183
H9_5 1.264116 0.023808 0.809685 0.753878 [0.8096849757622797, 0.9034534152302397, 1.793... [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] [0.7538782216300566, 1.3399656458665576, 1.384... 0.01269 0.965619 H9 5 2.017994
KTD8.2_1 0.238791 0.041092 0.550485 0.550485 [0.5504846337575998, 0.9347023082408397, 1.524... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.5504846337576019, 1.4975108560653476, 2.028... 0.018369 0.986486 KTD8.2 1 0.789276
KTD8.2_2 0.377419 0.039801 0.713858 1.75361 [0.7138583617075197, 1.1289241745020795, 3.124... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [1.753609889235595, 2.06184030467382, 2.357191... 0.046724 0.987279 KTD8.2 2 2.131029
KTD8.2_3 2.067121 0.018607 0.107452 0.056472 [0.10745158306233597, 0.3914869295194559, 0.60... [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... [0.43331846896985615, 1.4698020953022803, 1.80... 0.144407 0.988663 KTD8.2 3 2.123593
KTD8.2_4 1.688852 0.026211 1.417346 1.347096 [1.4173458607295994, 2.1666846135959994, 2.887... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3470964626644197, 2.227020170936482, 2.8263... 0.002315 0.987125 KTD8.2 4 3.035949
UCSFi001-A_2 0.588666 0.026581 0.470745 0.461125 [0.4707453288189598, 1.1102174528831996, 1.745... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9865636607443292, 1.2876735048588026, 2.037... 0.031031 0.970674 UCSFi001-A 2 1.049792
UCSFi001-A_3 1.067308 0.025661 0.823938 0.73609 [0.8239383636753597, 1.0375904860577996, 1.344... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... [0.7360902739853485, 1.2371900959914608, 1.644... 0.007454 0.995604 UCSFi001-A 3 1.803399
UCSFi001-A_4 11.891799 0.016863 0.65714 0.65714 [0.6571397848564797, 1.3643807979277436, 3.360... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.6571397848564793, 2.374656891528188, 6.8928... 0.053775 0.99402 UCSFi001-A 4 12.548939
UCSFi001-A_5 0.695858 0.055867 1.776417 1.328369 [1.7764168326609595, 2.3372079343318073, 3.079... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3283694521265805, 2.333591404662628, 3.2921... 0.02179 0.981117 UCSFi001-A 5 2.024228
In [47]:
fitted_df[fitted_df['a+c'] < 0].shape
Out[47]:
(0, 12)
In [48]:
fitted_df_filtered = fitted_df[fitted_df['r2'] > 0.9]
fitted_df_filtered
Out[48]:
a rate min(y) c mean_cum split_time y_fitted MSLE r2 Line split a+c
CTL01A_2 0.140845 0.031439 0.128738 0.051989 [0.12873836706191996, 0.5126940096263999, 0.75... [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] [0.5684601783697514, 0.7564306877808943, 1.265... 0.033141 0.955038 CTL01A 2 0.192835
CTL01A_4 6.709435 0.020925 1.071547 0.93823 [1.0715473444571997, 1.7103960320254554, 3.865... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] [0.9382302189532821, 2.1608528979695825, 5.549... 0.024887 0.958803 CTL01A 4 7.647665
CTL02A_1 1.358287 0.02225 1.065226 0.550477 [1.0652259377527198, 1.4048785412085596, 2.313... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.4269662693898706, 3.144128958826294, 4.7098... 0.10339 0.974893 CTL02A 1 1.908765
CTL02A_2 0.954612 0.043378 1.26411 0.76608 [1.2641101587982075, 1.7939214715189675, 4.596... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] [1.6412790277239497, 2.5151392692655543, 4.993... 0.016467 0.992677 CTL02A 2 1.720692
CTL02A_3 16.613953 0.016347 1.649741 0.656578 [1.6497408713817594, 2.607119548807967, 3.9639... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.6565776402558363, 2.0715333393022206, 7.081... 0.11749 0.975446 CTL02A 3 17.27053
CTL02A_5 0.682361 0.037966 0.654197 0.588409 [0.6541966858658399, 0.9161832703838398, 1.754... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.5884094472451199, 1.1586862058833431, 1.603... 0.007286 0.983085 CTL02A 5 1.27077
CTL04E_2 0.821185 0.024744 0.299609 0.299609 [0.2996085652689599, 1.1036059070299196, 1.703... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9655562584883766, 1.3365081793448954, 2.239... 0.028769 0.985605 CTL04E 2 1.120794
CTL04E_3 1.306325 0.023063 2.123347 2.123347 [2.1233470930991993, 2.4315673509784794, 2.677... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [2.6632786987130626, 3.089169790618284, 4.0282... 0.068529 0.976291 CTL04E 3 3.429672
CTL04E_4 9.612778 0.017852 1.218989 0.859733 [1.2189894108551995, 2.1238454675329193, 2.880... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.8597330722454402, 2.3354356000019507, 6.266... 0.044278 0.990038 CTL04E 4 10.472511
CTL05A_1 0.831961 0.030813 0.779193 0.497694 [0.7791926205821758, 2.426386553802719, 4.5256... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.432703578982128, 3.3169982277731034, 5.4623... 0.052295 0.99813 CTL05A 1 1.329655
CTL05A_2 0.055631 0.043012 0.205236 0.491117 [0.20523627369755992, 0.5079725389905838, 0.63... [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... [0.5174145386527128, 0.5916687051661024, 0.665... 0.00577 0.999672 CTL05A 2 0.546748
CTL05A_3 2.543079 0.01896 1.430206 0.870664 [1.4302055733008394, 1.6829849546056073, 1.766... [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... [0.8706640646764798, 1.7072362370661094, 2.260... 0.079341 0.984739 CTL05A 3 3.413743
CTL06F_1 0.192261 0.038794 0.186632 0.149139 [0.18663200746559994, 0.5037948979217999, 0.99... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... [0.14913877596194025, 0.8297829559442185, 1.19... 0.008407 0.998507 CTL06F 1 0.3414
CTL06F_2 0.178955 0.048632 0.221248 0.603675 [0.22124802574367994, 0.5315105005565759, 0.98... [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... [0.6036745082056189, 0.701943955589503, 0.9996... 0.014467 0.998732 CTL06F 2 0.78263
CTL06F_3 9.101519 0.017721 0.835248 0.584532 [0.8352482294782076, 1.4114732831694714, 2.609... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.5845324628693316, 1.4277887166726817, 4.455... 0.035992 0.9908 CTL06F 3 9.686051
CTL06F_5 1.517014 0.028446 0.995236 1.080533 [0.9952359126703195, 2.065253135791535, 2.6851... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.0805325124857454, 1.9549269947108237, 2.566... 0.001386 0.996178 CTL06F 5 2.597547
CTL07C_2 0.247166 0.031126 0.098445 0.039209 [0.09844493853571196, 0.27541168560691187, 0.4... [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... [0.11911823217794204, 0.31373406561857875, 0.4... 0.004426 0.990575 CTL07C 2 0.286375
CTL07C_3 1.613569 0.035564 2.400426 2.061275 [2.400426353463839, 3.2856292908866864, 4.3438... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [2.0612745335144926, 3.198564283424051, 4.2362... 0.003228 0.998319 CTL07C 3 3.674844
CTL07C_4 6.062421 0.019101 0.478994 0.409045 [0.47899434749327985, 0.8008914021730558, 1.50... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.4090454254569629, 1.016575178648564, 3.2294... 0.13463 0.978949 CTL07C 4 6.471466
CTL07C_6 2.861219 0.030355 0.717828 0.424542 [0.7178284323935998, 1.3666883712911995, 2.714... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.42454248274224105, 2.2135828308679812, 3.49... 0.030066 0.982106 CTL07C 6 3.285761
CTL08A_1 1.187035 0.033521 0.33419 0.103732 [0.33418952506655986, 1.1351766731587196, 3.25... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.10373199917555276, 3.304242116545631, 4.849... 0.085182 0.980946 CTL08A 1 1.290767
CTL08A_2 0.706487 0.033213 0.165209 0.120179 [0.16520885806607996, 0.42682587784329584, 0.6... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [0.12017926217257657, 0.5763851701267808, 0.98... 0.02542 0.996042 CTL08A 2 0.826666
CTL08A_3 4.495572 0.021725 0.369839 0.164502 [0.3698385596056799, 0.6810055432149598, 1.573... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.16450164389087973, 0.680346996777927, 2.610... 0.151265 0.973682 CTL08A 3 4.660074
CTL09A_2 0.263507 0.036516 0.416052 0.326098 [0.41605228590047993, 0.8020691253345598, 1.64... [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... [0.9418809126629182, 1.6397397810736654, 2.174... 0.041404 0.9842 CTL09A 2 0.589605
CTL09A_3 2.67779 0.028298 1.09468 0.4693 [1.0946797761170877, 2.2220952270454073, 3.557... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [1.8852781353057484, 3.07269361924069, 5.86530... 0.0525 0.98176 CTL09A 3 3.14709
CTL09A_4 3.743787 0.024477 0.856375 0.856375 [0.8563747115980797, 1.4846659608893753, 2.637... [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... [1.778987723662873, 3.849072862477047, 5.30617... 0.108845 0.990837 CTL09A 4 4.600161
CTL09A_5 0.645037 0.044788 0.999207 1.372094 [0.9992065878129598, 1.4917047366392635, 2.493... [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] [1.3720943628331366, 1.6500428832472247, 2.616... 0.013601 0.98726 CTL09A 5 2.017131
H1_1 0.881922 0.022767 0.127647 0.032898 [0.12764744386243193, 0.2877317192128319, 0.33... [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... [0.2334544692034437, 0.6740965916339468, 1.020... 0.103301 0.98154 H1 1 0.91482
H1_2 4.602859 0.013332 2.472889 2.472889 [2.4728886058766393, 2.915316716446655, 3.1268... [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... [2.472888605876677, 3.4918995137855164, 5.6118... 0.116448 0.957078 H1 2 7.075747
H9_1 4.27921 0.024254 0.377707 0.350742 [0.3777067706471999, 0.9652240400097598, 2.490... [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... [0.35074196350559994, 2.2284424262323013, 3.73... 0.046817 0.986148 H9 1 4.629952
H9_2 0.659132 0.045415 0.301872 0.153647 [0.30187165062959986, 1.850863636851551, 3.155... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [0.7971585632554442, 1.4548830570166933, 3.368... 0.01872 0.999084 H9 2 0.812778
H9_3 11.484083 0.015581 0.90592 0.3021 [0.9059195979950396, 1.1298801829973755, 1.425... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.30210013520928314, 1.2325485628117843, 4.50... 0.189097 0.975495 H9 3 11.786183
H9_5 1.264116 0.023808 0.809685 0.753878 [0.8096849757622797, 0.9034534152302397, 1.793... [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] [0.7538782216300566, 1.3399656458665576, 1.384... 0.01269 0.965619 H9 5 2.017994
KTD8.2_1 0.238791 0.041092 0.550485 0.550485 [0.5504846337575998, 0.9347023082408397, 1.524... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.5504846337576019, 1.4975108560653476, 2.028... 0.018369 0.986486 KTD8.2 1 0.789276
KTD8.2_2 0.377419 0.039801 0.713858 1.75361 [0.7138583617075197, 1.1289241745020795, 3.124... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [1.753609889235595, 2.06184030467382, 2.357191... 0.046724 0.987279 KTD8.2 2 2.131029
KTD8.2_3 2.067121 0.018607 0.107452 0.056472 [0.10745158306233597, 0.3914869295194559, 0.60... [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... [0.43331846896985615, 1.4698020953022803, 1.80... 0.144407 0.988663 KTD8.2 3 2.123593
KTD8.2_4 1.688852 0.026211 1.417346 1.347096 [1.4173458607295994, 2.1666846135959994, 2.887... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3470964626644197, 2.227020170936482, 2.8263... 0.002315 0.987125 KTD8.2 4 3.035949
UCSFi001-A_2 0.588666 0.026581 0.470745 0.461125 [0.4707453288189598, 1.1102174528831996, 1.745... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9865636607443292, 1.2876735048588026, 2.037... 0.031031 0.970674 UCSFi001-A 2 1.049792
UCSFi001-A_3 1.067308 0.025661 0.823938 0.73609 [0.8239383636753597, 1.0375904860577996, 1.344... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... [0.7360902739853485, 1.2371900959914608, 1.644... 0.007454 0.995604 UCSFi001-A 3 1.803399
UCSFi001-A_4 11.891799 0.016863 0.65714 0.65714 [0.6571397848564797, 1.3643807979277436, 3.360... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.6571397848564793, 2.374656891528188, 6.8928... 0.053775 0.99402 UCSFi001-A 4 12.548939
UCSFi001-A_5 0.695858 0.055867 1.776417 1.328369 [1.7764168326609595, 2.3372079343318073, 3.079... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3283694521265805, 2.333591404662628, 3.2921... 0.02179 0.981117 UCSFi001-A 5 2.024228
In [49]:
fig, ax = plt.subplots()
sns.barplot(data = fitted_df_filtered, x = 'split', y = 'rate', order = ['1', '2', '3', '4', '5', '6'], ax = ax)#, hue = 'Line', palette=line_palette)
_ = ax.set_ylabel('Growth rate', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
In [50]:
fig, ax = plt.subplots(figsize = (10,4))
fitted_df_filtered['split'] = fitted_df_filtered['split'].astype('int')
fitted_df_filtered['rate'] = fitted_df_filtered['rate'].astype('float64')
sns.lineplot(data = fitted_df_filtered, x = 'split', y = 'rate', hue = 'Line',
              ax = ax, palette=line_palette)
_ = ax.set_ylabel('Growth rate', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15)
ax.legend(bbox_to_anchor = (1,1))
Out[50]:
<matplotlib.legend.Legend at 0x7fe48429deb0>
In [51]:
fig, ax = plt.subplots()
sns.boxplot(data = fitted_df_filtered, x = 'Line', y = 'rate', ax = ax, palette=line_palette)
_ = ax.set_ylabel('Rate of area growth', fontsize = 20)
_ = ax.set_yticklabels(ax.get_yticklabels(), fontsize = 15)
_ = ax.set_xlabel('Passage', fontsize = 20)
_ = ax.set_xticklabels(ax.get_xticklabels(), fontsize = 15, rotation = 90)
In [52]:
fitted_df_filtered.sort_values(by = 'rate').to_csv('../../data/csv/iPSC_fitted_exp_area_sum_per_split.csv')
In [53]:
fitted_df_filtered.sort_values(by = 'rate')
Out[53]:
a rate min(y) c mean_cum split_time y_fitted MSLE r2 Line split a+c
H1_2 4.602859 0.013332 2.472889 2.472889 [2.4728886058766393, 2.915316716446655, 3.1268... [0.0, 15.0, 39.0, 47.0, 64.0, 72.0, 87.0, 95.0... [2.472888605876677, 3.4918995137855164, 5.6118... 0.116448 0.957078 H1 2 7.075747
H9_3 11.484083 0.015581 0.90592 0.3021 [0.9059195979950396, 1.1298801829973755, 1.425... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.30210013520928314, 1.2325485628117843, 4.50... 0.189097 0.975495 H9 3 11.786183
CTL02A_3 16.613953 0.016347 1.649741 0.656578 [1.6497408713817594, 2.607119548807967, 3.9639... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.6565776402558363, 2.0715333393022206, 7.081... 0.11749 0.975446 CTL02A 3 17.27053
UCSFi001-A_4 11.891799 0.016863 0.65714 0.65714 [0.6571397848564797, 1.3643807979277436, 3.360... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.6571397848564793, 2.374656891528188, 6.8928... 0.053775 0.99402 UCSFi001-A 4 12.548939
CTL06F_3 9.101519 0.017721 0.835248 0.584532 [0.8352482294782076, 1.4114732831694714, 2.609... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.5845324628693316, 1.4277887166726817, 4.455... 0.035992 0.9908 CTL06F 3 9.686051
CTL04E_4 9.612778 0.017852 1.218989 0.859733 [1.2189894108551995, 2.1238454675329193, 2.880... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0, 81.0,... [0.8597330722454402, 2.3354356000019507, 6.266... 0.044278 0.990038 CTL04E 4 10.472511
KTD8.2_3 2.067121 0.018607 0.107452 0.056472 [0.10745158306233597, 0.3914869295194559, 0.60... [9.0, 28.0, 33.0, 48.0, 57.0, 72.0, 80.0, 96.0... [0.43331846896985615, 1.4698020953022803, 1.80... 0.144407 0.988663 KTD8.2 3 2.123593
CTL05A_3 2.543079 0.018960 1.430206 0.870664 [1.4302055733008394, 1.6829849546056073, 1.766... [0.0, 15.0, 23.0, 39.0, 47.0, 64.0, 72.0, 87.0... [0.8706640646764798, 1.7072362370661094, 2.260... 0.079341 0.984739 CTL05A 3 3.413743
CTL07C_4 6.062421 0.019101 0.478994 0.409045 [0.47899434749327985, 0.8008914021730558, 1.50... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.4090454254569629, 1.016575178648564, 3.2294... 0.13463 0.978949 CTL07C 4 6.471466
CTL01A_4 6.709435 0.020925 1.071547 0.93823 [1.0715473444571997, 1.7103960320254554, 3.865... [0.0, 8.0, 25.0, 33.0, 48.0, 56.0, 73.0] [0.9382302189532821, 2.1608528979695825, 5.549... 0.024887 0.958803 CTL01A 4 7.647665
CTL08A_3 4.495572 0.021725 0.369839 0.164502 [0.3698385596056799, 0.6810055432149598, 1.573... [0.0, 5.0, 20.0, 29.0, 44.0, 52.0, 68.0, 76.0,... [0.16450164389087973, 0.680346996777927, 2.610... 0.151265 0.973682 CTL08A 3 4.660074
CTL02A_1 1.358287 0.022250 1.065226 0.550477 [1.0652259377527198, 1.4048785412085596, 2.313... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.4269662693898706, 3.144128958826294, 4.7098... 0.10339 0.974893 CTL02A 1 1.908765
H1_1 0.881922 0.022767 0.127647 0.032898 [0.12764744386243193, 0.2877317192128319, 0.33... [9.0, 24.0, 33.0, 48.0, 57.0, 72.0, 83.0, 96.0... [0.2334544692034437, 0.6740965916339468, 1.020... 0.103301 0.98154 H1 1 0.91482
CTL04E_3 1.306325 0.023063 2.123347 2.123347 [2.1233470930991993, 2.4315673509784794, 2.677... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [2.6632786987130626, 3.089169790618284, 4.0282... 0.068529 0.976291 CTL04E 3 3.429672
H9_5 1.264116 0.023808 0.809685 0.753878 [0.8096849757622797, 0.9034534152302397, 1.793... [0.0, 16.0, 17.0, 39.0, 48.0, 63.0] [0.7538782216300566, 1.3399656458665576, 1.384... 0.01269 0.965619 H9 5 2.017994
H9_1 4.27921 0.024254 0.377707 0.350742 [0.3777067706471999, 0.9652240400097598, 2.490... [0.0, 15.0, 24.0, 40.0, 48.0, 63.0, 72.0, 87.0... [0.35074196350559994, 2.2284424262323013, 3.73... 0.046817 0.986148 H9 1 4.629952
CTL09A_4 3.743787 0.024477 0.856375 0.856375 [0.8563747115980797, 1.4846659608893753, 2.637... [9.0, 24.0, 32.0, 48.0, 56.0, 73.0, 81.0, 96.0... [1.778987723662873, 3.849072862477047, 5.30617... 0.108845 0.990837 CTL09A 4 4.600161
CTL04E_2 0.821185 0.024744 0.299609 0.299609 [0.2996085652689599, 1.1036059070299196, 1.703... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9655562584883766, 1.3365081793448954, 2.239... 0.028769 0.985605 CTL04E 2 1.120794
UCSFi001-A_3 1.067308 0.025661 0.823938 0.73609 [0.8239383636753597, 1.0375904860577996, 1.344... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0... [0.7360902739853485, 1.2371900959914608, 1.644... 0.007454 0.995604 UCSFi001-A 3 1.803399
KTD8.2_4 1.688852 0.026211 1.417346 1.347096 [1.4173458607295994, 2.1666846135959994, 2.887... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3470964626644197, 2.227020170936482, 2.8263... 0.002315 0.987125 KTD8.2 4 3.035949
UCSFi001-A_2 0.588666 0.026581 0.470745 0.461125 [0.4707453288189598, 1.1102174528831996, 1.745... [24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105... [0.9865636607443292, 1.2876735048588026, 2.037... 0.031031 0.970674 UCSFi001-A 2 1.049792
CTL09A_3 2.67779 0.028298 1.09468 0.4693 [1.0946797761170877, 2.2220952270454073, 3.557... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.... [1.8852781353057484, 3.07269361924069, 5.86530... 0.0525 0.98176 CTL09A 3 3.14709
CTL06F_5 1.517014 0.028446 0.995236 1.080533 [0.9952359126703195, 2.065253135791535, 2.6851... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.0805325124857454, 1.9549269947108237, 2.566... 0.001386 0.996178 CTL06F 5 2.597547
CTL07C_6 2.861219 0.030355 0.717828 0.424542 [0.7178284323935998, 1.3666883712911995, 2.714... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.42454248274224105, 2.2135828308679812, 3.49... 0.030066 0.982106 CTL07C 6 3.285761
CTL05A_1 0.831961 0.030813 0.779193 0.497694 [0.7791926205821758, 2.426386553802719, 4.5256... [39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0, 12... [2.432703578982128, 3.3169982277731034, 5.4623... 0.052295 0.99813 CTL05A 1 1.329655
CTL07C_2 0.247166 0.031126 0.098445 0.039209 [0.09844493853571196, 0.27541168560691187, 0.4... [9.0, 24.0, 33.0, 49.0, 57.0, 72.0, 81.0, 96.0... [0.11911823217794204, 0.31373406561857875, 0.4... 0.004426 0.990575 CTL07C 2 0.286375
CTL01A_2 0.140845 0.031439 0.128738 0.051989 [0.12873836706191996, 0.5126940096263999, 0.75... [49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 120.0] [0.5684601783697514, 0.7564306877808943, 1.265... 0.033141 0.955038 CTL01A 2 0.192835
CTL08A_2 0.706487 0.033213 0.165209 0.120179 [0.16520885806607996, 0.42682587784329584, 0.6... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [0.12017926217257657, 0.5763851701267808, 0.98... 0.02542 0.996042 CTL08A 2 0.826666
CTL08A_1 1.187035 0.033521 0.33419 0.103732 [0.33418952506655986, 1.1351766731587196, 3.25... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.10373199917555276, 3.304242116545631, 4.849... 0.085182 0.980946 CTL08A 1 1.290767
CTL07C_3 1.613569 0.035564 2.400426 2.061275 [2.400426353463839, 3.2856292908866864, 4.3438... [0.0, 15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [2.0612745335144926, 3.198564283424051, 4.2362... 0.003228 0.998319 CTL07C 3 3.674844
CTL09A_2 0.263507 0.036516 0.416052 0.326098 [0.41605228590047993, 0.8020691253345598, 1.64... [33.0, 49.0, 57.0, 72.0, 81.0, 96.0, 105.0, 12... [0.9418809126629182, 1.6397397810736654, 2.174... 0.041404 0.9842 CTL09A 2 0.589605
CTL02A_5 0.682361 0.037966 0.654197 0.588409 [0.6541966858658399, 0.9161832703838398, 1.754... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [0.5884094472451199, 1.1586862058833431, 1.603... 0.007286 0.983085 CTL02A 5 1.27077
CTL06F_1 0.192261 0.038794 0.186632 0.149139 [0.18663200746559994, 0.5037948979217999, 0.99... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.... [0.14913877596194025, 0.8297829559442185, 1.19... 0.008407 0.998507 CTL06F 1 0.3414
KTD8.2_2 0.377419 0.039801 0.713858 1.75361 [0.7138583617075197, 1.1289241745020795, 3.124... [0.0, 15.0, 24.0, 39.0, 48.0, 63.0, 72.0, 87.0... [1.753609889235595, 2.06184030467382, 2.357191... 0.046724 0.987279 KTD8.2 2 2.131029
KTD8.2_1 0.238791 0.041092 0.550485 0.550485 [0.5504846337575998, 0.9347023082408397, 1.524... [0.0, 39.0, 48.0, 63.0, 72.0, 88.0, 96.0, 111.0] [0.5504846337576019, 1.4975108560653476, 2.028... 0.018369 0.986486 KTD8.2 1 0.789276
CTL05A_2 0.055631 0.043012 0.205236 0.491117 [0.20523627369755992, 0.5079725389905838, 0.63... [9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0, 96.0... [0.5174145386527128, 0.5916687051661024, 0.665... 0.00577 0.999672 CTL05A 2 0.546748
CTL02A_2 0.954612 0.043378 1.26411 0.76608 [1.2641101587982075, 1.7939214715189675, 4.596... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0, 96.0] [1.6412790277239497, 2.5151392692655543, 4.993... 0.016467 0.992677 CTL02A 2 1.720692
CTL09A_5 0.645037 0.044788 0.999207 1.372094 [0.9992065878129598, 1.4917047366392635, 2.493... [0.0, 8.0, 24.0, 32.0, 47.0, 56.0, 71.0] [1.3720943628331366, 1.6500428832472247, 2.616... 0.013601 0.98726 CTL09A 5 2.017131
H9_2 0.659132 0.045415 0.301872 0.153647 [0.30187165062959986, 1.850863636851551, 3.155... [15.0, 24.0, 39.0, 50.0, 63.0, 72.0, 87.0] [0.7971585632554442, 1.4548830570166933, 3.368... 0.01872 0.999084 H9 2 0.812778
CTL06F_2 0.178955 0.048632 0.221248 0.603675 [0.22124802574367994, 0.5315105005565759, 0.98... [0.0, 9.0, 24.0, 33.0, 48.0, 59.0, 72.0, 81.0,... [0.6036745082056189, 0.701943955589503, 0.9996... 0.014467 0.998732 CTL06F 2 0.78263
UCSFi001-A_5 0.695858 0.055867 1.776417 1.328369 [1.7764168326609595, 2.3372079343318073, 3.079... [0.0, 16.0, 24.0, 39.0, 48.0, 63.0] [1.3283694521265805, 2.333591404662628, 3.2921... 0.02179 0.981117 UCSFi001-A 5 2.024228
In [54]:
fitted_df['split'] = fitted_df['split'].astype('int')
fitted_df['rate'] = fitted_df['rate'].astype('float64')
In [55]:
pd.DataFrame(fitted_df.groupby('Line')['rate'].mean().sort_values())
Out[55]:
rate
Line
H1 0.018050
CTL08A 0.022974
CTL04E 0.025820
CTL01A 0.026182
H9 0.027264
CTL07C 0.029036
CTL02A 0.029985
CTL05A 0.030928
UCSFi001-A 0.031243
KTD8.2 0.031428
CTL06F 0.033399
CTL09A 0.033520
In [56]:
data_tot = []

for line_n in fitted_df.index:
    sub = fitted_df.loc[line_n]
    data = pd.DataFrame({'mean_cum': sub['mean_cum'], 'split_time': sub['split_time'], 'line_n': [line_n] * len(sub['mean_cum'])})
    data_tot.append(data)

data_tot = pd.concat(data_tot)

data_tot['Line'] = data_tot['line_n'].apply(lambda x: x.split('_')[0]).values
data_tot['split'] = data_tot['line_n'].apply(lambda x: x.split('_')[1]).values
In [57]:
fig, ax = plt.subplots(4,3, figsize = (20, 20))
ax = ax.flatten().T
ax_index = 0
fitted_param_line = {}

for line in order:

    fitted_param_line[line] = {}
    sub = data_tot[data_tot.Line == line]
    sub = sub.sort_values('split_time')
    
    if len(sub.split_time.unique()) > 5:
        # Calculate the cumulative sum of the mean total area for each split_time
        y = sub['mean_cum'].cumsum().values
        x = np.array(sub.split_time)

        try:
            popt, pcov = curve_fit(exp_model, x, y, p0=(max(y), 0.1, 0))

            fitted_param_line[line]['intercept'] = popt[0]
            fitted_param_line[line]['rate'] = popt[1]
            fitted_param_line[line]['mean_cum'] = y
            fitted_param_line[line]['split_time'] = x
            
            a, b, c = popt
            
            # Generate fitted y values
            y_fitted = exp_model(x, a, b, c)

            fitted_param_line[line]['y_fitted'] = y_fitted
            #fitted_param_line[line]['MSLE'] = mean_squared_log_error(y, y_fitted)
            fitted_param_line[line]['r2'] = r2_score(y, y_fitted)
            
            sns.lineplot(y = y_fitted, x = x, ax = ax[ax_index], markers = True)
            sns.scatterplot(y = y, x = x, ax = ax[ax_index], markers = True)
            ax[ax_index].xaxis.set_tick_params(rotation=90)
            ax[ax_index].set_title(f'{line}', fontsize = 30)
            ax[ax_index].set_ylabel('Increase in area ', fontsize = 20)
            ax[ax_index].set_xlabel('Time point', fontsize = 20)
            _ = ax[ax_index].set_xticklabels(ax[ax_index].get_xticklabels(), fontsize = 15)
            _ = ax[ax_index].set_yticklabels(ax[ax_index].get_yticklabels(), fontsize = 15)
            ax_index += 1
        
        except RuntimeError as e:
            print(f"Fitting failed for line_n {line_n}: {e}")
            
plt.tight_layout()
Fitting failed for line_n UCSFi001-A_5: Optimal parameters not found: Number of calls to function has reached maxfev = 800.
In [58]:
fitted_df_line = pd.DataFrame.from_dict(fitted_param_line).T
fitted_df_line = fitted_df_line[~fitted_df_line.intercept.isna()]
fitted_df_line.sort_values('rate')
Out[58]:
intercept rate mean_cum split_time y_fitted r2
CTL04E 204.245148 0.009307 [1.2189894108551995, 2.8758556161662385, 4.999... [0.0, 0.0, 8.0, 15.0, 16.0, 24.0, 24.0, 24.0, ... [-35.001497262974425, -35.001497262974425, -19... 0.977045
KTD8.2 82.65117 0.010281 [0.5504846337575998, 1.9678304944871994, 2.681... [0.0, 0.0, 0.0, 9.0, 15.0, 16.0, 24.0, 24.0, 2... [-15.476692162327979, -15.476692162327979, -15... 0.98984
H9 110.894548 0.013802 [0.3777067706471999, 1.1873917464094796, 2.093... [0.0, 0.0, 0.0, 5.0, 15.0, 15.0, 16.0, 17.0, 2... [-37.324349401025074, -37.324349401025074, -37... 0.986461
UCSFi001-A 102.618779 0.013856 [0.8239383636753597, 1.4810781485318394, 3.257... [0.0, 0.0, 0.0, 8.0, 15.0, 16.0, 24.0, 24.0, 2... [-28.756872674622286, -28.756872674622286, -28... 0.975132
CTL07C 62.000942 0.015562 [0.7178284323935998, 1.1968227798868796, 3.597... [0.0, 0.0, 0.0, 5.0, 9.0, 15.0, 16.0, 20.0, 24... [-9.858067505526257, -9.858067505526257, -9.85... 0.996057
CTL05A 33.125403 0.017384 [1.4302055733008394, 1.6354418469983993, 3.318... [0.0, 9.0, 15.0, 23.0, 24.0, 33.0, 39.0, 39.0,... [-33.80482906977792, -28.194803471203237, -23.... 0.95798
CTL02A 69.758992 0.018447 [1.6497408713817594, 2.3039375572475995, 4.911... [0.0, 0.0, 5.0, 15.0, 16.0, 20.0, 24.0, 24.0, ... [-36.696433146177405, -36.696433146177405, -29... 0.990371
CTL09A 51.576489 0.019058 [0.9992065878129598, 2.4909113244522234, 3.347... [0.0, 8.0, 9.0, 15.0, 24.0, 24.0, 24.0, 32.0, ... [-29.09169724724024, -20.597230803579677, -19.... 0.990286
H1 13.460639 0.019555 [2.4728886058766393, 2.600536049739071, 5.5158... [0.0, 9.0, 15.0, 24.0, 33.0, 39.0, 47.0, 48.0,... [-10.695057299386022, -8.104773073130806, -6.1... 0.987598
CTL08A 39.333914 0.019842 [0.33418952506655986, 1.0526780240236797, 1.21... [0.0, 0.0, 0.0, 0.0, 5.0, 15.0, 16.0, 17.0, 20... [-21.429730367136756, -21.429730367136756, -21... 0.984026
CTL06F 37.338291 0.019961 [0.18663200746559994, 1.1818679201359195, 1.40... [0.0, 0.0, 0.0, 0.0, 5.0, 9.0, 16.0, 20.0, 24.... [-16.228638125874095, -16.228638125874095, -16... 0.993179

Cumulative sum of the discrete derivative of the curves - per line per split¶

We use the logarithm of the area and then sum all the FOV for a specific time points in each line at each split. Then we smoothed it with gaussian_filter1d and computed the diff, corresponding to the first discrete derivative. We then use the cumsum() function to obtain the cumulative sum over the discrete differential of the growth.

In [59]:
fig, ax = plt.subplots(10,5, figsize = (5*10, 7*9), gridspec_kw={'hspace': 0.7})
ax = ax.flatten().T
ax_index = 0

for l in total_df.line.unique():
    
    #color = color_dict[l]

    sub = total_df[total_df.line == l]
    sub = sub.sort_values(by = 'datetime')
    sub = sub[sub.n_split != 'day']

    for split in sub.n_split.unique():

        subsub = sub[sub.n_split == split]

        
        # user defined function,
        # with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
        summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
        
        summary_df = summary_df.sort_values('split_time')
        
        summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
        summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
        summary_df['cumulative'] = summary_df['derivative'].cumsum()
        
        farray = np.array(summary_df['cumulative'])
        
        y_pos = summary_df.split_time.values
        
        ax[ax_index].errorbar(y_pos, farray, marker = 'o')

        ax[ax_index].xaxis.set_tick_params(rotation=90)
        ax[ax_index].set_title(f'{l}_split_{split}')
        ax[ax_index].set_ylabel('Cumulative growth')
        ax[ax_index].set_xlabel('Time point')
        ax_index += 1
        
plt.show()
In [60]:
sns.set(style="white", palette="Paired", color_codes=True)
fig, ax = plt.subplots(figsize=(7,5))
labels = []
lc = []
handles = []
all_lines = {}

total_df_no_first = total_df[total_df.n_split != '1'].copy()

cumulative_dict_dfs = {}

for l in total_df.line.unique():
    
    color = line_palette[l]

    sub = total_df[total_df.line == l]
    sub = sub.sort_values(by = 'datetime')
    sub = sub[sub.n_split != 'day']

    for split in sub.n_split.unique():


        subsub = sub[sub.n_split == split]

        if len(subsub.split_time.unique()) > 0:
    
            # user defined function,
            # with `area_sum` return the sum of the areas of all the field of view captured for that line at that time point
            summary_df = preprocess(subsub, original_v='Area (microm2)', final_output='mean')
            
            summary_df = summary_df.sort_values('split_time')
            
            summary_df['smoothed'] = gaussian_filter1d(summary_df['mean'], 3)
            summary_df['derivative'] = summary_df['smoothed'].diff() / summary_df['split_time'].diff()
            summary_df['cumulative'] = summary_df['derivative'].cumsum()
    
            cumulative_dict_dfs[f'{l}_{split}'] = summary_df
            
            farray = np.array(summary_df['cumulative'])
            
            y_pos = summary_df.split_time.values
    
            line, = ax.plot(y_pos, farray, color = color, marker = '.')
            ax.xaxis.set_tick_params(rotation=90)
            all_lines[f'{l}_{split}'] = line
    
            labels.append(l)
            handles.append(line)
            lc.append(color)
    
plt.legend(handles, labels, bbox_to_anchor = (1,1))
Out[60]:
<matplotlib.legend.Legend at 0x7fe47fcdcb50>
In [61]:
lines = total_df.line.unique()
In [62]:
highlight_growth_curves(all_lines, 
                        xlabel = 'Hours from split', 
                        ylabel = 'Cumulative growth', 
                        lines = lines, fontsize = 20)
#plt.savefig('./figures/cumulative_growth_curve_per_line.svg', dpi = 300)

Fit linear model¶

I fit here a linear regression model taking into account all the cumulatve sums of all the splits for each line. We extrapolate the slope as the rate of growth of the line.

In [63]:
cumulative_df = pd.concat(cumulative_dict_dfs.values(), keys = cumulative_dict_dfs.keys()).reset_index()
cumulative_df['line'] = cumulative_df.level_0.apply(lambda x: x.split('_')[0])
In [64]:
cumulative_df
Out[64]:
level_0 level_1 time_point mean stds hour month day datetime split_time smoothed derivative cumulative line
0 H1_1 0 06_11_23_t18 6.702764e+04 41616.484602 18 11 06 2023-11-06 18:00:00 0.0 151997.345803 NaN NaN H1
1 H1_1 2 07_11_23_t9 8.406022e+04 130798.841698 9 11 07 2023-11-07 09:00:00 15.0 189847.913490 2523.371179 2523.371179 H1
2 H1_1 1 07_11_23_t18 2.654972e+04 16684.343834 18 11 07 2023-11-07 18:00:00 24.0 269350.442789 8833.614367 11356.985546 H1
3 H1_1 4 08_11_23_t9 1.007394e+05 141517.200188 9 11 08 2023-11-08 09:00:00 39.0 396380.883270 8468.696032 19825.681578 H1
4 H1_1 3 08_11_23_t18 7.603012e+04 84318.866374 18 11 08 2023-11-08 18:00:00 48.0 576089.795294 19967.656892 39793.338469 H1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
459 CTL01A_5 3 20_11_23_t9 2.595179e+05 88067.869959 9 11 20 2023-11-20 09:00:00 23.0 586571.010023 4211.355442 8525.392117 CTL01A
460 CTL01A_5 2 20_11_23_t18 8.738935e+05 625314.219906 18 11 20 2023-11-20 18:00:00 32.0 667181.708241 8956.744246 17482.136364 CTL01A
461 CTL01A_5 5 21_11_23_t9 7.845210e+05 489648.194587 9 11 21 2023-11-21 09:00:00 47.0 749884.816544 5513.540554 22995.676917 CTL01A
462 CTL01A_5 4 21_11_23_t17 5.332625e+05 NaN 17 11 21 2023-11-21 17:00:00 55.0 817761.601808 8484.598158 31480.275075 CTL01A
463 CTL01A_5 6 22_11_23_t10 1.589454e+06 NaN 10 11 22 2023-11-22 10:00:00 72.0 856047.818918 2252.130418 33732.405494 CTL01A

464 rows × 14 columns

In [65]:
# Initialize a grid of plots with an Axes for each walk
grid = sns.FacetGrid(cumulative_df.sort_values(by = 'split_time'), col="line", palette=line_palette,
                     col_wrap=4, height=5)

grid.map(sns.regplot, "split_time", "cumulative", order = 1)

# Adjust the arrangement of the plots
grid.fig.tight_layout(w_pad=1)
In [66]:
fitted_model = {}
for l in cumulative_df.line.unique():
    
    sub = cumulative_df[cumulative_df.line == l]
    sub = sub.sort_values(by = 'datetime')

    key = f'{l}'
    fitted_model[key] = {}
    fitted_model[key]['slope'], fitted_model[key]['intercept'], fitted_model[key]['rvalue'], fitted_model[key]['pvalue'], fitted_model[key]['stderr'] = stats.linregress(sub['split_time'], sub['cumulative'].fillna(0))
In [67]:
fitted_model_df = pd.DataFrame.from_dict(fitted_model).T.reset_index()
fitted_model_df['line'] = fitted_model_df['index'].apply(lambda x: x.split('_')[0])
fitted_model_df.sort_values(by = 'slope')
Out[67]:
index slope intercept rvalue pvalue stderr line
11 CTL01A 889.158749 -3317.415096 0.658753 3.427561e-04 211.747106 CTL01A
0 H1 1507.258668 -24661.605065 0.929554 3.246271e-13 115.050563 H1
1 CTL04E 2701.742613 -29608.142008 0.849852 4.029196e-12 271.791859 CTL04E
3 CTL05A 2763.552397 -34043.044124 0.858594 5.454580e-12 275.014498 CTL05A
5 KTD8.2 2930.111544 -64281.188697 0.961890 3.878081e-24 131.700799 KTD8.2
10 UCSFi001-A 3326.563394 -48138.442881 0.873562 1.020601e-14 285.989085 UCSFi001-A
4 H9 5258.813593 -86560.673508 0.940884 5.551798e-19 311.244699 H9
9 CTL07C 5615.863178 -135367.457446 0.872476 2.224665e-13 510.241618 CTL07C
8 CTL08A 5884.302391 -142089.468408 0.888082 9.120149e-16 469.977991 CTL08A
7 CTL06F 6314.448619 -150942.297102 0.905129 3.365469e-17 457.644407 CTL06F
6 CTL09A 6938.961586 -121941.446019 0.895983 1.307039e-14 565.410212 CTL09A
2 CTL02A 7219.721473 -126601.267281 0.858920 1.340509e-12 698.296028 CTL02A
In [68]:
fitted_model_df.rvalue.mean()
Out[68]:
0.8744732639082721
In [69]:
fitted_model_df.sort_values(by = 'slope').to_csv('../../data/csv/iPSC_fitted_lm_grouped.csv')